); $this->reset_insertion_mode_appropriately(); return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > A start tag whose tag name is one of: "script", "template" * > An end tag whose tag name is "template" */ case '+SCRIPT': case '+TEMPLATE': case '-TEMPLATE': return $this->step_in_head(); } /* * > Anything else * > Parse error: ignore the token. */ return $this->step(); } /** * Parses next element in the 'in select in table' insertion mode. * * This internal function performs the 'in select in table' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-inselectintable * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_in_select_in_table(): bool { $token_name = $this->get_token_name(); $token_type = $this->get_token_type(); $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; $op = "{$op_sigil}{$token_name}"; switch ( $op ) { /* * > A start tag whose tag name is one of: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th" */ case '+CAPTION': case '+TABLE': case '+TBODY': case '+TFOOT': case '+THEAD': case '+TR': case '+TD': case '+TH': // @todo Indicate a parse error once it's possible. $this->state->stack_of_open_elements->pop_until( 'SELECT' ); $this->reset_insertion_mode_appropriately(); return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > An end tag whose tag name is one of: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th" */ case '-CAPTION': case '-TABLE': case '-TBODY': case '-TFOOT': case '-THEAD': case '-TR': case '-TD': case '-TH': // @todo Indicate a parse error once it's possible. if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $token_name ) ) { return $this->step(); } $this->state->stack_of_open_elements->pop_until( 'SELECT' ); $this->reset_insertion_mode_appropriately(); return $this->step( self::REPROCESS_CURRENT_NODE ); } /* * > Anything else */ return $this->step_in_select(); } /** * Parses next element in the 'in template' insertion mode. * * This internal function performs the 'in template' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 Stub implementation. * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-intemplate * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_in_template(): bool { $token_name = $this->get_token_name(); $token_type = $this->get_token_type(); $is_closer = $this->is_tag_closer(); $op_sigil = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : ''; $op = "{$op_sigil}{$token_name}"; switch ( $op ) { /* * > A character token * > A comment token * > A DOCTYPE token */ case '#text': case '#comment': case '#funky-comment': case '#presumptuous-tag': case 'html': return $this->step_in_body(); /* * > A start tag whose tag name is one of: "base", "basefont", "bgsound", "link", * > "meta", "noframes", "script", "style", "template", "title" * > An end tag whose tag name is "template" */ case '+BASE': case '+BASEFONT': case '+BGSOUND': case '+LINK': case '+META': case '+NOFRAMES': case '+SCRIPT': case '+STYLE': case '+TEMPLATE': case '+TITLE': case '-TEMPLATE': return $this->step_in_head(); /* * > A start tag whose tag name is one of: "caption", "colgroup", "tbody", "tfoot", "thead" */ case '+CAPTION': case '+COLGROUP': case '+TBODY': case '+TFOOT': case '+THEAD': array_pop( $this->state->stack_of_template_insertion_modes ); $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > A start tag whose tag name is "col" */ case '+COL': array_pop( $this->state->stack_of_template_insertion_modes ); $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > A start tag whose tag name is "tr" */ case '+TR': array_pop( $this->state->stack_of_template_insertion_modes ); $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); /* * > A start tag whose tag name is one of: "td", "th" */ case '+TD': case '+TH': array_pop( $this->state->stack_of_template_insertion_modes ); $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; return $this->step( self::REPROCESS_CURRENT_NODE ); } /* * > Any other start tag */ if ( ! $is_closer ) { array_pop( $this->state->stack_of_template_insertion_modes ); $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); } /* * > Any other end tag */ if ( $is_closer ) { // Parse error: ignore the token. return $this->step(); } /* * > An end-of-file token */ if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { // Stop parsing. return false; } // @todo Indicate a parse error once it's possible. $this->state->stack_of_open_elements->pop_until( 'TEMPLATE' ); $this->state->active_formatting_elements->clear_up_to_last_marker(); array_pop( $this->state->stack_of_template_insertion_modes ); $this->reset_insertion_mode_appropriately(); return $this->step( self::REPROCESS_CURRENT_NODE ); } /** * Parses next element in the 'after body' insertion mode. * * This internal function performs the 'after body' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 Stub implementation. * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-afterbody * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_after_body(): bool { $tag_name = $this->get_token_name(); $token_type = $this->get_token_type(); $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; $op = "{$op_sigil}{$tag_name}"; switch ( $op ) { /* * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE * * > Process the token using the rules for the "in body" insertion mode. */ case '#text': if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { return $this->step_in_body(); } goto after_body_anything_else; break; /* * > A comment token */ case '#comment': case '#funky-comment': case '#presumptuous-tag': $this->bail( 'Content outside of BODY is unsupported.' ); break; /* * > A DOCTYPE token */ case 'html': // Parse error: ignore the token. return $this->step(); /* * > A start tag whose tag name is "html" */ case '+HTML': return $this->step_in_body(); /* * > An end tag whose tag name is "html" * * > If the parser was created as part of the HTML fragment parsing algorithm, * > this is a parse error; ignore the token. (fragment case) * > * > Otherwise, switch the insertion mode to "after after body". */ case '-HTML': if ( isset( $this->context_node ) ) { return $this->step(); } $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY; /* * The HTML element is not removed from the stack of open elements. * Only internal state has changed, this does not qualify as a "step" * in terms of advancing through the document to another token. * Nothing has been pushed or popped. * Proceed to parse the next item. */ return $this->step(); } /* * > Parse error. Switch the insertion mode to "in body" and reprocess the token. */ after_body_anything_else: $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); } /** * Parses next element in the 'in frameset' insertion mode. * * This internal function performs the 'in frameset' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 Stub implementation. * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-inframeset * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_in_frameset(): bool { $tag_name = $this->get_token_name(); $token_type = $this->get_token_type(); $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; $op = "{$op_sigil}{$tag_name}"; switch ( $op ) { /* * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE * > * > Insert the character. * * This algorithm effectively strips non-whitespace characters from text and inserts * them under HTML. This is not supported at this time. */ case '#text': if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { return $this->step_in_body(); } $this->bail( 'Non-whitespace characters cannot be handled in frameset.' ); break; /* * > A comment token */ case '#comment': case '#funky-comment': case '#presumptuous-tag': $this->insert_html_element( $this->state->current_token ); return true; /* * > A DOCTYPE token */ case 'html': // Parse error: ignore the token. return $this->step(); /* * > A start tag whose tag name is "html" */ case '+HTML': return $this->step_in_body(); /* * > A start tag whose tag name is "frameset" */ case '+FRAMESET': $this->insert_html_element( $this->state->current_token ); return true; /* * > An end tag whose tag name is "frameset" */ case '-FRAMESET': /* * > If the current node is the root html element, then this is a parse error; * > ignore the token. (fragment case) */ if ( $this->state->stack_of_open_elements->current_node_is( 'HTML' ) ) { return $this->step(); } /* * > Otherwise, pop the current node from the stack of open elements. */ $this->state->stack_of_open_elements->pop(); /* * > If the parser was not created as part of the HTML fragment parsing algorithm * > (fragment case), and the current node is no longer a frameset element, then * > switch the insertion mode to "after frameset". */ if ( ! isset( $this->context_node ) && ! $this->state->stack_of_open_elements->current_node_is( 'FRAMESET' ) ) { $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET; } return true; /* * > A start tag whose tag name is "frame" * * > Insert an HTML element for the token. Immediately pop the * > current node off the stack of open elements. * > * > Acknowledge the token's self-closing flag, if it is set. */ case '+FRAME': $this->insert_html_element( $this->state->current_token ); $this->state->stack_of_open_elements->pop(); return true; /* * > A start tag whose tag name is "noframes" */ case '+NOFRAMES': return $this->step_in_head(); } // Parse error: ignore the token. return $this->step(); } /** * Parses next element in the 'after frameset' insertion mode. * * This internal function performs the 'after frameset' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 Stub implementation. * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-afterframeset * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_after_frameset(): bool { $tag_name = $this->get_token_name(); $token_type = $this->get_token_type(); $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; $op = "{$op_sigil}{$tag_name}"; switch ( $op ) { /* * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE * > * > Insert the character. * * This algorithm effectively strips non-whitespace characters from text and inserts * them under HTML. This is not supported at this time. */ case '#text': if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { return $this->step_in_body(); } $this->bail( 'Non-whitespace characters cannot be handled in after frameset' ); break; /* * > A comment token */ case '#comment': case '#funky-comment': case '#presumptuous-tag': $this->insert_html_element( $this->state->current_token ); return true; /* * > A DOCTYPE token */ case 'html': // Parse error: ignore the token. return $this->step(); /* * > A start tag whose tag name is "html" */ case '+HTML': return $this->step_in_body(); /* * > An end tag whose tag name is "html" */ case '-HTML': $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET; /* * The HTML element is not removed from the stack of open elements. * Only internal state has changed, this does not qualify as a "step" * in terms of advancing through the document to another token. * Nothing has been pushed or popped. * Proceed to parse the next item. */ return $this->step(); /* * > A start tag whose tag name is "noframes" */ case '+NOFRAMES': return $this->step_in_head(); } // Parse error: ignore the token. return $this->step(); } /** * Parses next element in the 'after after body' insertion mode. * * This internal function performs the 'after after body' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 Stub implementation. * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#the-after-after-body-insertion-mode * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_after_after_body(): bool { $tag_name = $this->get_token_name(); $token_type = $this->get_token_type(); $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; $op = "{$op_sigil}{$tag_name}"; switch ( $op ) { /* * > A comment token */ case '#comment': case '#funky-comment': case '#presumptuous-tag': $this->bail( 'Content outside of HTML is unsupported.' ); break; /* * > A DOCTYPE token * > A start tag whose tag name is "html" * * > Process the token using the rules for the "in body" insertion mode. */ case 'html': case '+HTML': return $this->step_in_body(); /* * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE * > * > Process the token using the rules for the "in body" insertion mode. */ case '#text': if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { return $this->step_in_body(); } goto after_after_body_anything_else; break; } /* * > Parse error. Switch the insertion mode to "in body" and reprocess the token. */ after_after_body_anything_else: $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; return $this->step( self::REPROCESS_CURRENT_NODE ); } /** * Parses next element in the 'after after frameset' insertion mode. * * This internal function performs the 'after after frameset' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 Stub implementation. * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#the-after-after-frameset-insertion-mode * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_after_after_frameset(): bool { $tag_name = $this->get_token_name(); $token_type = $this->get_token_type(); $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; $op = "{$op_sigil}{$tag_name}"; switch ( $op ) { /* * > A comment token */ case '#comment': case '#funky-comment': case '#presumptuous-tag': $this->bail( 'Content outside of HTML is unsupported.' ); break; /* * > A DOCTYPE token * > A start tag whose tag name is "html" * * > Process the token using the rules for the "in body" insertion mode. */ case 'html': case '+HTML': return $this->step_in_body(); /* * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE * > * > Process the token using the rules for the "in body" insertion mode. * * This algorithm effectively strips non-whitespace characters from text and inserts * them under HTML. This is not supported at this time. */ case '#text': if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { return $this->step_in_body(); } $this->bail( 'Non-whitespace characters cannot be handled in after after frameset.' ); break; /* * > A start tag whose tag name is "noframes" */ case '+NOFRAMES': return $this->step_in_head(); } // Parse error: ignore the token. return $this->step(); } /** * Parses next element in the 'in foreign content' insertion mode. * * This internal function performs the 'in foreign content' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * * @since 6.7.0 Stub implementation. * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * * @see https://html.spec.whatwg.org/#parsing-main-inforeign * @see WP_HTML_Processor::step * * @return bool Whether an element was found. */ private function step_in_foreign_content(): bool { $tag_name = $this->get_token_name(); $token_type = $this->get_token_type(); $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; $op = "{$op_sigil}{$tag_name}"; /* * > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size" * * This section drawn out above the switch to more easily incorporate * the additional rules based on the presence of the attributes. */ if ( '+FONT' === $op && ( null !== $this->get_attribute( 'color' ) || null !== $this->get_attribute( 'face' ) || null !== $this->get_attribute( 'size' ) ) ) { $op = '+FONT with attributes'; } switch ( $op ) { case '#text': /* * > A character token that is U+0000 NULL * * This is handled by `get_modifiable_text()`. */ /* * Whitespace-only text does not affect the frameset-ok flag. * It is probably inter-element whitespace, but it may also * contain character references which decode only to whitespace. */ if ( parent::TEXT_IS_GENERIC === $this->text_node_classification ) { $this->state->frameset_ok = false; } $this->insert_foreign_element( $this->state->current_token, false ); return true; /* * CDATA sections are alternate wrappers for text content and therefore * ought to follow the same rules as text nodes. */ case '#cdata-section': /* * NULL bytes and whitespace do not change the frameset-ok flag. */ $current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ]; $cdata_content_start = $current_token->start + 9; $cdata_content_length = $current_token->length - 12; if ( strspn( $this->html, "\0 \t\n\f\r", $cdata_content_start, $cdata_content_length ) !== $cdata_content_length ) { $this->state->frameset_ok = false; } $this->insert_foreign_element( $this->state->current_token, false ); return true; /* * > A comment token */ case '#comment': case '#funky-comment': case '#presumptuous-tag': $this->insert_foreign_element( $this->state->current_token, false ); return true; /* * > A DOCTYPE token */ case 'html': // Parse error: ignore the token. return $this->step(); /* * > A start tag whose tag name is "b", "big", "blockquote", "body", "br", "center", * > "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2", "h3", "h4", "h5", * > "h6", "head", "hr", "i", "img", "li", "listing", "menu", "meta", "nobr", "ol", * > "p", "pre", "ruby", "s", "small", "span", "strong", "strike", "sub", "sup", * > "table", "tt", "u", "ul", "var" * * > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size" * * > An end tag whose tag name is "br", "p" * * Closing BR tags are always reported by the Tag Processor as opening tags. */ case '+B': case '+BIG': case '+BLOCKQUOTE': case '+BODY': case '+BR': case '+CENTER': case '+CODE': case '+DD': case '+DIV': case '+DL': case '+DT': case '+EM': case '+EMBED': case '+H1': case '+H2': case '+H3': case '+H4': case '+H5': case '+H6': case '+HEAD': case '+HR': case '+I': case '+IMG': case '+LI': case '+LISTING': case '+MENU': case '+META': case '+NOBR': case '+OL': case '+P': case '+PRE': case '+RUBY': case '+S': case '+SMALL': case '+SPAN': case '+STRONG': case '+STRIKE': case '+SUB': case '+SUP': case '+TABLE': case '+TT': case '+U': case '+UL': case '+VAR': case '+FONT with attributes': case '-BR': case '-P': // @todo Indicate a parse error once it's possible. foreach ( $this->state->stack_of_open_elements->walk_up() as $current_node ) { if ( 'math' === $current_node->integration_node_type || 'html' === $current_node->integration_node_type || 'html' === $current_node->namespace ) { break; } $this->state->stack_of_open_elements->pop(); } goto in_foreign_content_process_in_current_insertion_mode; } /* * > Any other start tag */ if ( ! $this->is_tag_closer() ) { $this->insert_foreign_element( $this->state->current_token, false ); /* * > If the token has its self-closing flag set, then run * > the appropriate steps from the following list: * > * > ↪ the token's tag name is "script", and the new current node is in the SVG namespace * > Acknowledge the token's self-closing flag, and then act as * > described in the steps for a "script" end tag below. * > * > ↪ Otherwise * > Pop the current node off the stack of open elements and * > acknowledge the token's self-closing flag. * * Since the rules for SCRIPT below indicate to pop the element off of the stack of * open elements, which is the same for the Otherwise condition, there's no need to * separate these checks. The difference comes when a parser operates with the scripting * flag enabled, and executes the script, which this parser does not support. */ if ( $this->state->current_token->has_self_closing_flag ) { $this->state->stack_of_open_elements->pop(); } return true; } /* * > An end tag whose name is "script", if the current node is an SVG script element. */ if ( $this->is_tag_closer() && 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) { $this->state->stack_of_open_elements->pop(); return true; } /* * > Any other end tag */ if ( $this->is_tag_closer() ) { $node = $this->state->stack_of_open_elements->current_node(); if ( $tag_name !== $node->node_name ) { // @todo Indicate a parse error once it's possible. } in_foreign_content_end_tag_loop: if ( $node === $this->state->stack_of_open_elements->at( 1 ) ) { return true; } /* * > If node's tag name, converted to ASCII lowercase, is the same as the tag name * > of the token, pop elements from the stack of open elements until node has * > been popped from the stack, and then return. */ if ( 0 === strcasecmp( $node->node_name, $tag_name ) ) { foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { $this->state->stack_of_open_elements->pop(); if ( $node === $item ) { return true; } } } foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) { $node = $item; break; } if ( 'html' !== $node->namespace ) { goto in_foreign_content_end_tag_loop; } in_foreign_content_process_in_current_insertion_mode: switch ( $this->state->insertion_mode ) { case WP_HTML_Processor_State::INSERTION_MODE_INITIAL: return $this->step_initial(); case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML: return $this->step_before_html(); case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD: return $this->step_before_head(); case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD: return $this->step_in_head(); case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT: return $this->step_in_head_noscript(); case WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD: return $this->step_after_head(); case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY: return $this->step_in_body(); case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE: return $this->step_in_table(); case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_TEXT: return $this->step_in_table_text(); case WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION: return $this->step_in_caption(); case WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP: return $this->step_in_column_group(); case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY: return $this->step_in_table_body(); case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW: return $this->step_in_row(); case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL: return $this->step_in_cell(); case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT: return $this->step_in_select(); case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE: return $this->step_in_select_in_table(); case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE: return $this->step_in_template(); case WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY: return $this->step_after_body(); case WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET: return $this->step_in_frameset(); case WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET: return $this->step_after_frameset(); case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY: return $this->step_after_after_body(); case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET: return $this->step_after_after_frameset(); // This should be unreachable but PHP doesn't have total type checking on switch. default: $this->bail( "Unaware of the requested parsing mode: '{$this->state->insertion_mode}'." ); } } $this->bail( 'Should not have been able to reach end of IN FOREIGN CONTENT processing. Check HTML API code.' ); // This unnecessary return prevents tools from inaccurately reporting type errors. return false; } /* * Internal helpers */ /** * Creates a new bookmark for the currently-matched token and returns the generated name. * * @since 6.4.0 * @since 6.5.0 Renamed from bookmark_tag() to bookmark_token(). * * @throws Exception When unable to allocate requested bookmark. * * @return string|false Name of created bookmark, or false if unable to create. */ private function bookmark_token() { if ( ! parent::set_bookmark( ++$this->bookmark_counter ) ) { $this->last_error = self::ERROR_EXCEEDED_MAX_BOOKMARKS; throw new Exception( 'could not allocate bookmark' ); } return "{$this->bookmark_counter}"; } /* * HTML semantic overrides for Tag Processor */ /** * Indicates the namespace of the current token, or "html" if there is none. * * @return string One of "html", "math", or "svg". */ public function get_namespace(): string { if ( ! isset( $this->current_element ) ) { return parent::get_namespace(); } return $this->current_element->token->namespace; } /** * Returns the uppercase name of the matched tag. * * The semantic rules for HTML specify that certain tags be reprocessed * with a different tag name. Because of this, the tag name presented * by the HTML Processor may differ from the one reported by the HTML * Tag Processor, which doesn't apply these semantic rules. * * Example: * * $processor = new WP_HTML_Tag_Processor( '
` stops at tags `TABLE`,
* `TBODY`, `TR`, and `TD`. The `TBODY` and `TR` tags do not appear in
* the original HTML and cannot be used as bookmarks.
*
* @since 6.4.0
*
* @param string $bookmark_name Identifies this particular bookmark.
* @return bool Whether the bookmark was successfully created.
*/
public function set_bookmark( $bookmark_name ): bool {
if ( $this->is_virtual() ) {
_doing_it_wrong(
__METHOD__,
__( 'Cannot set bookmarks on tokens that do no appear in the original HTML text.' ),
'6.8.0'
);
return false;
}
return parent::set_bookmark( "_{$bookmark_name}" );
}
/**
* Checks whether a bookmark with the given name exists.
*
* @since 6.5.0
*
* @param string $bookmark_name Name to identify a bookmark that potentially exists.
* @return bool Whether that bookmark exists.
*/
public function has_bookmark( $bookmark_name ): bool {
return parent::has_bookmark( "_{$bookmark_name}" );
}
/*
* HTML Parsing Algorithms
*/
/**
* Closes a P element.
*
* @since 6.4.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#close-a-p-element
*/
private function close_a_p_element(): void {
$this->generate_implied_end_tags( 'P' );
$this->state->stack_of_open_elements->pop_until( 'P' );
}
/**
* Closes elements that have implied end tags.
*
* @since 6.4.0
* @since 6.7.0 Full spec support.
*
* @see https://html.spec.whatwg.org/#generate-implied-end-tags
*
* @param string|null $except_for_this_element Perform as if this element doesn't exist in the stack of open elements.
*/
private function generate_implied_end_tags( ?string $except_for_this_element = null ): void {
$elements_with_implied_end_tags = array(
'DD',
'DT',
'LI',
'OPTGROUP',
'OPTION',
'P',
'RB',
'RP',
'RT',
'RTC',
);
$no_exclusions = ! isset( $except_for_this_element );
while (
( $no_exclusions || ! $this->state->stack_of_open_elements->current_node_is( $except_for_this_element ) ) &&
in_array( $this->state->stack_of_open_elements->current_node()->node_name, $elements_with_implied_end_tags, true )
) {
$this->state->stack_of_open_elements->pop();
}
}
/**
* Closes elements that have implied end tags, thoroughly.
*
* See the HTML specification for an explanation why this is
* different from generating end tags in the normal sense.
*
* @since 6.4.0
* @since 6.7.0 Full spec support.
*
* @see WP_HTML_Processor::generate_implied_end_tags
* @see https://html.spec.whatwg.org/#generate-implied-end-tags
*/
private function generate_implied_end_tags_thoroughly(): void {
$elements_with_implied_end_tags = array(
'CAPTION',
'COLGROUP',
'DD',
'DT',
'LI',
'OPTGROUP',
'OPTION',
'P',
'RB',
'RP',
'RT',
'RTC',
'TBODY',
'TD',
'TFOOT',
'TH',
'THEAD',
'TR',
);
while ( in_array( $this->state->stack_of_open_elements->current_node()->node_name, $elements_with_implied_end_tags, true ) ) {
$this->state->stack_of_open_elements->pop();
}
}
/**
* Returns the adjusted current node.
*
* > The adjusted current node is the context element if the parser was created as
* > part of the HTML fragment parsing algorithm and the stack of open elements
* > has only one element in it (fragment case); otherwise, the adjusted current
* > node is the current node.
*
* @see https://html.spec.whatwg.org/#adjusted-current-node
*
* @since 6.7.0
*
* @return WP_HTML_Token|null The adjusted current node.
*/
private function get_adjusted_current_node(): ?WP_HTML_Token {
if ( isset( $this->context_node ) && 1 === $this->state->stack_of_open_elements->count() ) {
return $this->context_node;
}
return $this->state->stack_of_open_elements->current_node();
}
/**
* Reconstructs the active formatting elements.
*
* > This has the effect of reopening all the formatting elements that were opened
* > in the current body, cell, or caption (whichever is youngest) that haven't
* > been explicitly closed.
*
* @since 6.4.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#reconstruct-the-active-formatting-elements
*
* @return bool Whether any formatting elements needed to be reconstructed.
*/
private function reconstruct_active_formatting_elements(): bool {
/*
* > If there are no entries in the list of active formatting elements, then there is nothing
* > to reconstruct; stop this algorithm.
*/
if ( 0 === $this->state->active_formatting_elements->count() ) {
return false;
}
$last_entry = $this->state->active_formatting_elements->current_node();
if (
/*
* > If the last (most recently added) entry in the list of active formatting elements is a marker;
* > stop this algorithm.
*/
'marker' === $last_entry->node_name ||
/*
* > If the last (most recently added) entry in the list of active formatting elements is an
* > element that is in the stack of open elements, then there is nothing to reconstruct;
* > stop this algorithm.
*/
$this->state->stack_of_open_elements->contains_node( $last_entry )
) {
return false;
}
$this->bail( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' );
}
/**
* Runs the reset the insertion mode appropriately algorithm.
*
* @since 6.7.0
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately
*/
private function reset_insertion_mode_appropriately(): void {
// Set the first node.
$first_node = null;
foreach ( $this->state->stack_of_open_elements->walk_down() as $first_node ) {
break;
}
/*
* > 1. Let _last_ be false.
*/
$last = false;
foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
/*
* > 2. Let _node_ be the last node in the stack of open elements.
* > 3. _Loop_: If _node_ is the first node in the stack of open elements, then set _last_
* > to true, and, if the parser was created as part of the HTML fragment parsing
* > algorithm (fragment case), set node to the context element passed to
* > that algorithm.
* > …
*/
if ( $node === $first_node ) {
$last = true;
if ( isset( $this->context_node ) ) {
$node = $this->context_node;
}
}
// All of the following rules are for matching HTML elements.
if ( 'html' !== $node->namespace ) {
continue;
}
switch ( $node->node_name ) {
/*
* > 4. If node is a `select` element, run these substeps:
* > 1. If _last_ is true, jump to the step below labeled done.
* > 2. Let _ancestor_ be _node_.
* > 3. _Loop_: If _ancestor_ is the first node in the stack of open elements,
* > jump to the step below labeled done.
* > 4. Let ancestor be the node before ancestor in the stack of open elements.
* > …
* > 7. Jump back to the step labeled _loop_.
* > 8. _Done_: Switch the insertion mode to "in select" and return.
*/
case 'SELECT':
if ( ! $last ) {
foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $ancestor ) {
if ( 'html' !== $ancestor->namespace ) {
continue;
}
switch ( $ancestor->node_name ) {
/*
* > 5. If _ancestor_ is a `template` node, jump to the step below
* > labeled _done_.
*/
case 'TEMPLATE':
break 2;
/*
* > 6. If _ancestor_ is a `table` node, switch the insertion mode to
* > "in select in table" and return.
*/
case 'TABLE':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE;
return;
}
}
}
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT;
return;
/*
* > 5. If _node_ is a `td` or `th` element and _last_ is false, then switch the
* > insertion mode to "in cell" and return.
*/
case 'TD':
case 'TH':
if ( ! $last ) {
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL;
return;
}
break;
/*
* > 6. If _node_ is a `tr` element, then switch the insertion mode to "in row"
* > and return.
*/
case 'TR':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
return;
/*
* > 7. If _node_ is a `tbody`, `thead`, or `tfoot` element, then switch the
* > insertion mode to "in table body" and return.
*/
case 'TBODY':
case 'THEAD':
case 'TFOOT':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY;
return;
/*
* > 8. If _node_ is a `caption` element, then switch the insertion mode to
* > "in caption" and return.
*/
case 'CAPTION':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION;
return;
/*
* > 9. If _node_ is a `colgroup` element, then switch the insertion mode to
* > "in column group" and return.
*/
case 'COLGROUP':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP;
return;
/*
* > 10. If _node_ is a `table` element, then switch the insertion mode to
* > "in table" and return.
*/
case 'TABLE':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE;
return;
/*
* > 11. If _node_ is a `template` element, then switch the insertion mode to the
* > current template insertion mode and return.
*/
case 'TEMPLATE':
$this->state->insertion_mode = end( $this->state->stack_of_template_insertion_modes );
return;
/*
* > 12. If _node_ is a `head` element and _last_ is false, then switch the
* > insertion mode to "in head" and return.
*/
case 'HEAD':
if ( ! $last ) {
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD;
return;
}
break;
/*
* > 13. If _node_ is a `body` element, then switch the insertion mode to "in body"
* > and return.
*/
case 'BODY':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
return;
/*
* > 14. If _node_ is a `frameset` element, then switch the insertion mode to
* > "in frameset" and return. (fragment case)
*/
case 'FRAMESET':
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET;
return;
/*
* > 15. If _node_ is an `html` element, run these substeps:
* > 1. If the head element pointer is null, switch the insertion mode to
* > "before head" and return. (fragment case)
* > 2. Otherwise, the head element pointer is not null, switch the insertion
* > mode to "after head" and return.
*/
case 'HTML':
$this->state->insertion_mode = isset( $this->state->head_element )
? WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD
: WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD;
return;
}
}
/*
* > 16. If _last_ is true, then switch the insertion mode to "in body"
* > and return. (fragment case)
*
* This is only reachable if `$last` is true, as per the fragment parsing case.
*/
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
}
/**
* Runs the adoption agency algorithm.
*
* @since 6.4.0
*
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
*
* @see https://html.spec.whatwg.org/#adoption-agency-algorithm
*/
private function run_adoption_agency_algorithm(): void {
$budget = 1000;
$subject = $this->get_tag();
$current_node = $this->state->stack_of_open_elements->current_node();
if (
// > If the current node is an HTML element whose tag name is subject
$current_node && $subject === $current_node->node_name &&
// > the current node is not in the list of active formatting elements
! $this->state->active_formatting_elements->contains_node( $current_node )
) {
$this->state->stack_of_open_elements->pop();
return;
}
$outer_loop_counter = 0;
while ( $budget-- > 0 ) {
if ( $outer_loop_counter++ >= 8 ) {
return;
}
/*
* > Let formatting element be the last element in the list of active formatting elements that:
* > - is between the end of the list and the last marker in the list,
* > if any, or the start of the list otherwise,
* > - and has the tag name subject.
*/
$formatting_element = null;
foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
if ( 'marker' === $item->node_name ) {
break;
}
if ( $subject === $item->node_name ) {
$formatting_element = $item;
break;
}
}
// > If there is no such element, then return and instead act as described in the "any other end tag" entry above.
if ( null === $formatting_element ) {
$this->bail( 'Cannot run adoption agency when "any other end tag" is required.' );
}
// > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return.
if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) {
$this->state->active_formatting_elements->remove_node( $formatting_element );
return;
}
// > If formatting element is in the stack of open elements, but the element is not in scope, then this is a parse error; return.
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $formatting_element->node_name ) ) {
return;
}
/*
* > Let furthest block be the topmost node in the stack of open elements that is lower in the stack
* > than formatting element, and is an element in the special category. There might not be one.
*/
$is_above_formatting_element = true;
$furthest_block = null;
foreach ( $this->state->stack_of_open_elements->walk_down() as $item ) {
if ( $is_above_formatting_element && $formatting_element->bookmark_name !== $item->bookmark_name ) {
continue;
}
if ( $is_above_formatting_element ) {
$is_above_formatting_element = false;
continue;
}
if ( self::is_special( $item ) ) {
$furthest_block = $item;
break;
}
}
/*
* > If there is no furthest block, then the UA must first pop all the nodes from the bottom of the
* > stack of open elements, from the current node up to and including formatting element, then
* > remove formatting element from the list of active formatting elements, and finally return.
*/
if ( null === $furthest_block ) {
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
$this->state->stack_of_open_elements->pop();
if ( $formatting_element->bookmark_name === $item->bookmark_name ) {
$this->state->active_formatting_elements->remove_node( $formatting_element );
return;
}
}
}
$this->bail( 'Cannot extract common ancestor in adoption agency algorithm.' );
}
$this->bail( 'Cannot run adoption agency when looping required.' );
}
/**
* Runs the "close the cell" algorithm.
*
* > Where the steps above say to close the cell, they mean to run the following algorithm:
* > 1. Generate implied end tags.
* > 2. If the current node is not now a td element or a th element, then this is a parse error.
* > 3. Pop elements from the stack of open elements stack until a td element or a th element has been popped from the stack.
* > 4. Clear the list of active formatting elements up to the last marker.
* > 5. Switch the insertion mode to "in row".
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#close-the-cell
*
* @since 6.7.0
*/
private function close_cell(): void {
$this->generate_implied_end_tags();
// @todo Parse error if the current node is a "td" or "th" element.
foreach ( $this->state->stack_of_open_elements->walk_up() as $element ) {
$this->state->stack_of_open_elements->pop();
if ( 'TD' === $element->node_name || 'TH' === $element->node_name ) {
break;
}
}
$this->state->active_formatting_elements->clear_up_to_last_marker();
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW;
}
/**
* Inserts an HTML element on the stack of open elements.
*
* @since 6.4.0
*
* @see https://html.spec.whatwg.org/#insert-a-foreign-element
*
* @param WP_HTML_Token $token Name of bookmark pointing to element in original input HTML.
*/
private function insert_html_element( WP_HTML_Token $token ): void {
$this->state->stack_of_open_elements->push( $token );
}
/**
* Inserts a foreign element on to the stack of open elements.
*
* @since 6.7.0
*
* @see https://html.spec.whatwg.org/#insert-a-foreign-element
*
* @param WP_HTML_Token $token Insert this token. The token's namespace and
* insertion point will be updated correctly.
* @param bool $only_add_to_element_stack Whether to skip the "insert an element at the adjusted
* insertion location" algorithm when adding this element.
*/
private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to_element_stack ): void {
$adjusted_current_node = $this->get_adjusted_current_node();
$token->namespace = $adjusted_current_node ? $adjusted_current_node->namespace : 'html';
if ( $this->is_mathml_integration_point() ) {
$token->integration_node_type = 'math';
} elseif ( $this->is_html_integration_point() ) {
$token->integration_node_type = 'html';
}
if ( false === $only_add_to_element_stack ) {
/*
* @todo Implement the "appropriate place for inserting a node" and the
* "insert an element at the adjusted insertion location" algorithms.
*
* These algorithms mostly impacts DOM tree construction and not the HTML API.
* Here, there's no DOM node onto which the element will be appended, so the
* parser will skip this step.
*
* @see https://html.spec.whatwg.org/#insert-an-element-at-the-adjusted-insertion-location
*/
}
$this->insert_html_element( $token );
}
/**
* Inserts a virtual element on the stack of open elements.
*
* @since 6.7.0
*
* @param string $token_name Name of token to create and insert into the stack of open elements.
* @param string|null $bookmark_name Optional. Name to give bookmark for created virtual node.
* Defaults to auto-creating a bookmark name.
* @return WP_HTML_Token Newly-created virtual token.
*/
private function insert_virtual_node( $token_name, $bookmark_name = null ): WP_HTML_Token {
$here = $this->bookmarks[ $this->state->current_token->bookmark_name ];
$name = $bookmark_name ?? $this->bookmark_token();
$this->bookmarks[ $name ] = new WP_HTML_Span( $here->start, 0 );
$token = new WP_HTML_Token( $name, $token_name, false );
$this->insert_html_element( $token );
return $token;
}
/*
* HTML Specification Helpers
*/
/**
* Indicates if the current token is a MathML integration point.
*
* @since 6.7.0
*
* @see https://html.spec.whatwg.org/#mathml-text-integration-point
*
* @return bool Whether the current token is a MathML integration point.
*/
private function is_mathml_integration_point(): bool {
$current_token = $this->state->current_token;
if ( ! isset( $current_token ) ) {
return false;
}
if ( 'math' !== $current_token->namespace || 'M' !== $current_token->node_name[0] ) {
return false;
}
$tag_name = $current_token->node_name;
return (
'MI' === $tag_name ||
'MO' === $tag_name ||
'MN' === $tag_name ||
'MS' === $tag_name ||
'MTEXT' === $tag_name
);
}
/**
* Indicates if the current token is an HTML integration point.
*
* Note that this method must be an instance method with access
* to the current token, since it needs to examine the attributes
* of the currently-matched tag, if it's in the MathML namespace.
* Otherwise it would be required to scan the HTML and ensure that
* no other accounting is overlooked.
*
* @since 6.7.0
*
* @see https://html.spec.whatwg.org/#html-integration-point
*
* @return bool Whether the current token is an HTML integration point.
*/
private function is_html_integration_point(): bool {
$current_token = $this->state->current_token;
if ( ! isset( $current_token ) ) {
return false;
}
if ( 'html' === $current_token->namespace ) {
return false;
}
$tag_name = $current_token->node_name;
if ( 'svg' === $current_token->namespace ) {
return (
'DESC' === $tag_name ||
'FOREIGNOBJECT' === $tag_name ||
'TITLE' === $tag_name
);
}
if ( 'math' === $current_token->namespace ) {
if ( 'ANNOTATION-XML' !== $tag_name ) {
return false;
}
$encoding = $this->get_attribute( 'encoding' );
return (
is_string( $encoding ) &&
(
0 === strcasecmp( $encoding, 'application/xhtml+xml' ) ||
0 === strcasecmp( $encoding, 'text/html' )
)
);
}
$this->bail( 'Should not have reached end of HTML Integration Point detection: check HTML API code.' );
// This unnecessary return prevents tools from inaccurately reporting type errors.
return false;
}
/**
* Returns whether an element of a given name is in the HTML special category.
*
* @since 6.4.0
*
* @see https://html.spec.whatwg.org/#special
*
* @param WP_HTML_Token|string $tag_name Node to check, or only its name if in the HTML namespace.
* @return bool Whether the element of the given name is in the special category.
*/
public static function is_special( $tag_name ): bool {
if ( is_string( $tag_name ) ) {
$tag_name = strtoupper( $tag_name );
} else {
$tag_name = 'html' === $tag_name->namespace
? strtoupper( $tag_name->node_name )
: "{$tag_name->namespace} {$tag_name->node_name}";
}
return (
'ADDRESS' === $tag_name ||
'APPLET' === $tag_name ||
'AREA' === $tag_name ||
'ARTICLE' === $tag_name ||
'ASIDE' === $tag_name ||
'BASE' === $tag_name ||
'BASEFONT' === $tag_name ||
'BGSOUND' === $tag_name ||
'BLOCKQUOTE' === $tag_name ||
'BODY' === $tag_name ||
'BR' === $tag_name ||
'BUTTON' === $tag_name ||
'CAPTION' === $tag_name ||
'CENTER' === $tag_name ||
'COL' === $tag_name ||
'COLGROUP' === $tag_name ||
'DD' === $tag_name ||
'DETAILS' === $tag_name ||
'DIR' === $tag_name ||
'DIV' === $tag_name ||
'DL' === $tag_name ||
'DT' === $tag_name ||
'EMBED' === $tag_name ||
'FIELDSET' === $tag_name ||
'FIGCAPTION' === $tag_name ||
'FIGURE' === $tag_name ||
'FOOTER' === $tag_name ||
'FORM' === $tag_name ||
'FRAME' === $tag_name ||
'FRAMESET' === $tag_name ||
'H1' === $tag_name ||
'H2' === $tag_name ||
'H3' === $tag_name ||
'H4' === $tag_name ||
'H5' === $tag_name ||
'H6' === $tag_name ||
'HEAD' === $tag_name ||
'HEADER' === $tag_name ||
'HGROUP' === $tag_name ||
'HR' === $tag_name ||
'HTML' === $tag_name ||
'IFRAME' === $tag_name ||
'IMG' === $tag_name ||
'INPUT' === $tag_name ||
'KEYGEN' === $tag_name ||
'LI' === $tag_name ||
'LINK' === $tag_name ||
'LISTING' === $tag_name ||
'MAIN' === $tag_name ||
'MARQUEE' === $tag_name ||
'MENU' === $tag_name ||
'META' === $tag_name ||
'NAV' === $tag_name ||
'NOEMBED' === $tag_name ||
'NOFRAMES' === $tag_name ||
'NOSCRIPT' === $tag_name ||
'OBJECT' === $tag_name ||
'OL' === $tag_name ||
'P' === $tag_name ||
'PARAM' === $tag_name ||
'PLAINTEXT' === $tag_name ||
'PRE' === $tag_name ||
'SCRIPT' === $tag_name ||
'SEARCH' === $tag_name ||
'SECTION' === $tag_name ||
'SELECT' === $tag_name ||
'SOURCE' === $tag_name ||
'STYLE' === $tag_name ||
'SUMMARY' === $tag_name ||
'TABLE' === $tag_name ||
'TBODY' === $tag_name ||
'TD' === $tag_name ||
'TEMPLATE' === $tag_name ||
'TEXTAREA' === $tag_name ||
'TFOOT' === $tag_name ||
'TH' === $tag_name ||
'THEAD' === $tag_name ||
'TITLE' === $tag_name ||
'TR' === $tag_name ||
'TRACK' === $tag_name ||
'UL' === $tag_name ||
'WBR' === $tag_name ||
'XMP' === $tag_name ||
// MathML.
'math MI' === $tag_name ||
'math MO' === $tag_name ||
'math MN' === $tag_name ||
'math MS' === $tag_name ||
'math MTEXT' === $tag_name ||
'math ANNOTATION-XML' === $tag_name ||
// SVG.
'svg DESC' === $tag_name ||
'svg FOREIGNOBJECT' === $tag_name ||
'svg TITLE' === $tag_name
);
}
/**
* Returns whether a given element is an HTML Void Element
*
* > area, base, br, col, embed, hr, img, input, link, meta, source, track, wbr
*
* @since 6.4.0
*
* @see https://html.spec.whatwg.org/#void-elements
*
* @param string $tag_name Name of HTML tag to check.
* @return bool Whether the given tag is an HTML Void Element.
*/
public static function is_void( $tag_name ): bool {
$tag_name = strtoupper( $tag_name );
return (
'AREA' === $tag_name ||
'BASE' === $tag_name ||
'BASEFONT' === $tag_name || // Obsolete but still treated as void.
'BGSOUND' === $tag_name || // Obsolete but still treated as void.
'BR' === $tag_name ||
'COL' === $tag_name ||
'EMBED' === $tag_name ||
'FRAME' === $tag_name ||
'HR' === $tag_name ||
'IMG' === $tag_name ||
'INPUT' === $tag_name ||
'KEYGEN' === $tag_name || // Obsolete but still treated as void.
'LINK' === $tag_name ||
'META' === $tag_name ||
'PARAM' === $tag_name || // Obsolete but still treated as void.
'SOURCE' === $tag_name ||
'TRACK' === $tag_name ||
'WBR' === $tag_name
);
}
/**
* Gets an encoding from a given string.
*
* This is an algorithm defined in the WHAT-WG specification.
*
* Example:
*
* 'UTF-8' === self::get_encoding( 'utf8' );
* 'UTF-8' === self::get_encoding( " \tUTF-8 " );
* null === self::get_encoding( 'UTF-7' );
* null === self::get_encoding( 'utf8; charset=' );
*
* @see https://encoding.spec.whatwg.org/#concept-encoding-get
*
* @todo As this parser only supports UTF-8, only the UTF-8
* encodings are detected. Add more as desired, but the
* parser will bail on non-UTF-8 encodings.
*
* @since 6.7.0
*
* @param string $label A string which may specify a known encoding.
* @return string|null Known encoding if matched, otherwise null.
*/
protected static function get_encoding( string $label ): ?string {
/*
* > Remove any leading and trailing ASCII whitespace from label.
*/
$label = trim( $label, " \t\f\r\n" );
/*
* > If label is an ASCII case-insensitive match for any of the labels listed in the
* > table below, then return the corresponding encoding; otherwise return failure.
*/
switch ( strtolower( $label ) ) {
case 'unicode-1-1-utf-8':
case 'unicode11utf8':
case 'unicode20utf8':
case 'utf-8':
case 'utf8':
case 'x-unicode20utf8':
return 'UTF-8';
default:
return null;
}
}
/*
* Constants that would pollute the top of the class if they were found there.
*/
/**
* Indicates that the next HTML token should be parsed and processed.
*
* @since 6.4.0
*
* @var string
*/
const PROCESS_NEXT_NODE = 'process-next-node';
/**
* Indicates that the current HTML token should be reprocessed in the newly-selected insertion mode.
*
* @since 6.4.0
*
* @var string
*/
const REPROCESS_CURRENT_NODE = 'reprocess-current-node';
/**
* Indicates that the current HTML token should be processed without advancing the parser.
*
* @since 6.5.0
*
* @var string
*/
const PROCESS_CURRENT_NODE = 'process-current-node';
/**
* Indicates that the parser encountered unsupported markup and has bailed.
*
* @since 6.4.0
*
* @var string
*/
const ERROR_UNSUPPORTED = 'unsupported';
/**
* Indicates that the parser encountered more HTML tokens than it
* was able to process and has bailed.
*
* @since 6.4.0
*
* @var string
*/
const ERROR_EXCEEDED_MAX_BOOKMARKS = 'exceeded-max-bookmarks';
/**
* Unlock code that must be passed into the constructor to create this class.
*
* This class extends the WP_HTML_Tag_Processor, which has a public class
* constructor. Therefore, it's not possible to have a private constructor here.
*
* This unlock code is used to ensure that anyone calling the constructor is
* doing so with a full understanding that it's intended to be a private API.
*
* @access private
*/
const CONSTRUCTOR_UNLOCK_CODE = 'Use WP_HTML_Processor::create_fragment() instead of calling the class constructor directly.';
}
inner blocks were found.
* }
* }
* @return string String of rendered HTML.
*/
function serialize_blocks( $blocks ) {
return implode( '', array_map( 'serialize_block', $blocks ) );
}
/**
* Traverses a parsed block tree and applies callbacks before and after serializing it.
*
* Recursively traverses the block and its inner blocks and applies the two callbacks provided as
* arguments, the first one before serializing the block, and the second one after serializing it.
* If either callback returns a string value, it will be prepended and appended to the serialized
* block markup, respectively.
*
* The callbacks will receive a reference to the current block as their first argument, so that they
* can also modify it, and the current block's parent block as second argument. Finally, the
* `$pre_callback` receives the previous block, whereas the `$post_callback` receives
* the next block as third argument.
*
* Serialized blocks are returned including comment delimiters, and with all attributes serialized.
*
* This function should be used when there is a need to modify the saved block, or to inject markup
* into the return value. Prefer `serialize_block` when preparing a block to be saved to post content.
*
* This function is meant for internal use only.
*
* @since 6.4.0
* @access private
*
* @see serialize_block()
*
* @param array $block An associative array of a single parsed block object. See WP_Block_Parser_Block.
* @param callable $pre_callback Callback to run on each block in the tree before it is traversed and serialized.
* It is called with the following arguments: &$block, $parent_block, $previous_block.
* Its string return value will be prepended to the serialized block markup.
* @param callable $post_callback Callback to run on each block in the tree after it is traversed and serialized.
* It is called with the following arguments: &$block, $parent_block, $next_block.
* Its string return value will be appended to the serialized block markup.
* @return string Serialized block markup.
*/
function traverse_and_serialize_block( $block, $pre_callback = null, $post_callback = null ) {
$block_content = '';
$block_index = 0;
foreach ( $block['innerContent'] as $chunk ) {
if ( is_string( $chunk ) ) {
$block_content .= $chunk;
} else {
$inner_block = $block['innerBlocks'][ $block_index ];
if ( is_callable( $pre_callback ) ) {
$prev = 0 === $block_index
? null
: $block['innerBlocks'][ $block_index - 1 ];
$block_content .= call_user_func_array(
$pre_callback,
array( &$inner_block, &$block, $prev )
);
}
if ( is_callable( $post_callback ) ) {
$next = count( $block['innerBlocks'] ) - 1 === $block_index
? null
: $block['innerBlocks'][ $block_index + 1 ];
$post_markup = call_user_func_array(
$post_callback,
array( &$inner_block, &$block, $next )
);
}
$block_content .= traverse_and_serialize_block( $inner_block, $pre_callback, $post_callback );
$block_content .= isset( $post_markup ) ? $post_markup : '';
++$block_index;
}
}
if ( ! is_array( $block['attrs'] ) ) {
$block['attrs'] = array();
}
return get_comment_delimited_block_content(
$block['blockName'],
$block['attrs'],
$block_content
);
}
/**
* Replaces patterns in a block tree with their content.
*
* @since 6.6.0
*
* @param array $blocks An array blocks.
*
* @return array An array of blocks with patterns replaced by their content.
*/
function resolve_pattern_blocks( $blocks ) {
static $inner_content;
// Keep track of seen references to avoid infinite loops.
static $seen_refs = array();
$i = 0;
while ( $i < count( $blocks ) ) {
if ( 'core/pattern' === $blocks[ $i ]['blockName'] ) {
$attrs = $blocks[ $i ]['attrs'];
if ( empty( $attrs['slug'] ) ) {
++$i;
continue;
}
$slug = $attrs['slug'];
if ( isset( $seen_refs[ $slug ] ) ) {
// Skip recursive patterns.
array_splice( $blocks, $i, 1 );
continue;
}
$registry = WP_Block_Patterns_Registry::get_instance();
$pattern = $registry->get_registered( $slug );
// Skip unknown patterns.
if ( ! $pattern ) {
++$i;
continue;
}
$blocks_to_insert = parse_blocks( $pattern['content'] );
$seen_refs[ $slug ] = true;
$prev_inner_content = $inner_content;
$inner_content = null;
$blocks_to_insert = resolve_pattern_blocks( $blocks_to_insert );
$inner_content = $prev_inner_content;
unset( $seen_refs[ $slug ] );
array_splice( $blocks, $i, 1, $blocks_to_insert );
// If we have inner content, we need to insert nulls in the
// inner content array, otherwise serialize_blocks will skip
// blocks.
if ( $inner_content ) {
$null_indices = array_keys( $inner_content, null, true );
$content_index = $null_indices[ $i ];
$nulls = array_fill( 0, count( $blocks_to_insert ), null );
array_splice( $inner_content, $content_index, 1, $nulls );
}
// Skip inserted blocks.
$i += count( $blocks_to_insert );
} else {
if ( ! empty( $blocks[ $i ]['innerBlocks'] ) ) {
$prev_inner_content = $inner_content;
$inner_content = $blocks[ $i ]['innerContent'];
$blocks[ $i ]['innerBlocks'] = resolve_pattern_blocks(
$blocks[ $i ]['innerBlocks']
);
$blocks[ $i ]['innerContent'] = $inner_content;
$inner_content = $prev_inner_content;
}
++$i;
}
}
return $blocks;
}
/**
* Given an array of parsed block trees, applies callbacks before and after serializing them and
* returns their concatenated output.
*
* Recursively traverses the blocks and their inner blocks and applies the two callbacks provided as
* arguments, the first one before serializing a block, and the second one after serializing.
* If either callback returns a string value, it will be prepended and appended to the serialized
* block markup, respectively.
*
* The callbacks will receive a reference to the current block as their first argument, so that they
* can also modify it, and the current block's parent block as second argument. Finally, the
* `$pre_callback` receives the previous block, whereas the `$post_callback` receives
* the next block as third argument.
*
* Serialized blocks are returned including comment delimiters, and with all attributes serialized.
*
* This function should be used when there is a need to modify the saved blocks, or to inject markup
* into the return value. Prefer `serialize_blocks` when preparing blocks to be saved to post content.
*
* This function is meant for internal use only.
*
* @since 6.4.0
* @access private
*
* @see serialize_blocks()
*
* @param array[] $blocks An array of parsed blocks. See WP_Block_Parser_Block.
* @param callable $pre_callback Callback to run on each block in the tree before it is traversed and serialized.
* It is called with the following arguments: &$block, $parent_block, $previous_block.
* Its string return value will be prepended to the serialized block markup.
* @param callable $post_callback Callback to run on each block in the tree after it is traversed and serialized.
* It is called with the following arguments: &$block, $parent_block, $next_block.
* Its string return value will be appended to the serialized block markup.
* @return string Serialized block markup.
*/
function traverse_and_serialize_blocks( $blocks, $pre_callback = null, $post_callback = null ) {
$result = '';
$parent_block = null; // At the top level, there is no parent block to pass to the callbacks; yet the callbacks expect a reference.
$pre_callback_is_callable = is_callable( $pre_callback );
$post_callback_is_callable = is_callable( $post_callback );
foreach ( $blocks as $index => $block ) {
if ( $pre_callback_is_callable ) {
$prev = 0 === $index
? null
: $blocks[ $index - 1 ];
$result .= call_user_func_array(
$pre_callback,
array( &$block, &$parent_block, $prev )
);
}
if ( $post_callback_is_callable ) {
$next = count( $blocks ) - 1 === $index
? null
: $blocks[ $index + 1 ];
$post_markup = call_user_func_array(
$post_callback,
array( &$block, &$parent_block, $next )
);
}
$result .= traverse_and_serialize_block( $block, $pre_callback, $post_callback );
$result .= isset( $post_markup ) ? $post_markup : '';
}
return $result;
}
/**
* Filters and sanitizes block content to remove non-allowable HTML
* from parsed block attribute values.
*
* @since 5.3.1
*
* @param string $text Text that may contain block content.
* @param array[]|string $allowed_html Optional. An array of allowed HTML elements and attributes,
* or a context name such as 'post'. See wp_kses_allowed_html()
* for the list of accepted context names. Default 'post'.
* @param string[] $allowed_protocols Optional. Array of allowed URL protocols.
* Defaults to the result of wp_allowed_protocols().
* @return string The filtered and sanitized content result.
*/
function filter_block_content( $text, $allowed_html = 'post', $allowed_protocols = array() ) {
$result = '';
if ( str_contains( $text, '' ) ) {
$text = preg_replace_callback( '%%', '_filter_block_content_callback', $text );
}
$blocks = parse_blocks( $text );
foreach ( $blocks as $block ) {
$block = filter_block_kses( $block, $allowed_html, $allowed_protocols );
$result .= serialize_block( $block );
}
return $result;
}
/**
* Callback used for regular expression replacement in filter_block_content().
*
* @since 6.2.1
* @access private
*
* @param array $matches Array of preg_replace_callback matches.
* @return string Replacement string.
*/
function _filter_block_content_callback( $matches ) {
return '';
}
/**
* Filters and sanitizes a parsed block to remove non-allowable HTML
* from block attribute values.
*
* @since 5.3.1
*
* @param WP_Block_Parser_Block $block The parsed block object.
* @param array[]|string $allowed_html An array of allowed HTML elements and attributes,
* or a context name such as 'post'. See wp_kses_allowed_html()
* for the list of accepted context names.
* @param string[] $allowed_protocols Optional. Array of allowed URL protocols.
* Defaults to the result of wp_allowed_protocols().
* @return array The filtered and sanitized block object result.
*/
function filter_block_kses( $block, $allowed_html, $allowed_protocols = array() ) {
$block['attrs'] = filter_block_kses_value( $block['attrs'], $allowed_html, $allowed_protocols, $block );
if ( is_array( $block['innerBlocks'] ) ) {
foreach ( $block['innerBlocks'] as $i => $inner_block ) {
$block['innerBlocks'][ $i ] = filter_block_kses( $inner_block, $allowed_html, $allowed_protocols );
}
}
return $block;
}
/**
* Filters and sanitizes a parsed block attribute value to remove
* non-allowable HTML.
*
* @since 5.3.1
* @since 6.5.5 Added the `$block_context` parameter.
*
* @param string[]|string $value The attribute value to filter.
* @param array[]|string $allowed_html An array of allowed HTML elements and attributes,
* or a context name such as 'post'. See wp_kses_allowed_html()
* for the list of accepted context names.
* @param string[] $allowed_protocols Optional. Array of allowed URL protocols.
* Defaults to the result of wp_allowed_protocols().
* @param array $block_context Optional. The block the attribute belongs to, in parsed block array format.
* @return string[]|string The filtered and sanitized result.
*/
function filter_block_kses_value( $value, $allowed_html, $allowed_protocols = array(), $block_context = null ) {
if ( is_array( $value ) ) {
foreach ( $value as $key => $inner_value ) {
$filtered_key = filter_block_kses_value( $key, $allowed_html, $allowed_protocols, $block_context );
$filtered_value = filter_block_kses_value( $inner_value, $allowed_html, $allowed_protocols, $block_context );
if ( isset( $block_context['blockName'] ) && 'core/template-part' === $block_context['blockName'] ) {
$filtered_value = filter_block_core_template_part_attributes( $filtered_value, $filtered_key, $allowed_html );
}
if ( $filtered_key !== $key ) {
unset( $value[ $key ] );
}
$value[ $filtered_key ] = $filtered_value;
}
} elseif ( is_string( $value ) ) {
return wp_kses( $value, $allowed_html, $allowed_protocols );
}
return $value;
}
/**
* Sanitizes the value of the Template Part block's `tagName` attribute.
*
* @since 6.5.5
*
* @param string $attribute_value The attribute value to filter.
* @param string $attribute_name The attribute name.
* @param array[]|string $allowed_html An array of allowed HTML elements and attributes,
* or a context name such as 'post'. See wp_kses_allowed_html()
* for the list of accepted context names.
* @return string The sanitized attribute value.
*/
function filter_block_core_template_part_attributes( $attribute_value, $attribute_name, $allowed_html ) {
if ( empty( $attribute_value ) || 'tagName' !== $attribute_name ) {
return $attribute_value;
}
if ( ! is_array( $allowed_html ) ) {
$allowed_html = wp_kses_allowed_html( $allowed_html );
}
return isset( $allowed_html[ $attribute_value ] ) ? $attribute_value : '';
}
/**
* Parses blocks out of a content string, and renders those appropriate for the excerpt.
*
* As the excerpt should be a small string of text relevant to the full post content,
* this function renders the blocks that are most likely to contain such text.
*
* @since 5.0.0
*
* @param string $content The content to parse.
* @return string The parsed and filtered content.
*/
function excerpt_remove_blocks( $content ) {
if ( ! has_blocks( $content ) ) {
return $content;
}
$allowed_inner_blocks = array(
// Classic blocks have their blockName set to null.
null,
'core/freeform',
'core/heading',
'core/html',
'core/list',
'core/media-text',
'core/paragraph',
'core/preformatted',
'core/pullquote',
'core/quote',
'core/table',
'core/verse',
);
$allowed_wrapper_blocks = array(
'core/columns',
'core/column',
'core/group',
);
/**
* Filters the list of blocks that can be used as wrapper blocks, allowing
* excerpts to be generated from the `innerBlocks` of these wrappers.
*
* @since 5.8.0
*
* @param string[] $allowed_wrapper_blocks The list of names of allowed wrapper blocks.
*/
$allowed_wrapper_blocks = apply_filters( 'excerpt_allowed_wrapper_blocks', $allowed_wrapper_blocks );
$allowed_blocks = array_merge( $allowed_inner_blocks, $allowed_wrapper_blocks );
/**
* Filters the list of blocks that can contribute to the excerpt.
*
* If a dynamic block is added to this list, it must not generate another
* excerpt, as this will cause an infinite loop to occur.
*
* @since 5.0.0
*
* @param string[] $allowed_blocks The list of names of allowed blocks.
*/
$allowed_blocks = apply_filters( 'excerpt_allowed_blocks', $allowed_blocks );
$blocks = parse_blocks( $content );
$output = '';
foreach ( $blocks as $block ) {
if ( in_array( $block['blockName'], $allowed_blocks, true ) ) {
if ( ! empty( $block['innerBlocks'] ) ) {
if ( in_array( $block['blockName'], $allowed_wrapper_blocks, true ) ) {
$output .= _excerpt_render_inner_blocks( $block, $allowed_blocks );
continue;
}
// Skip the block if it has disallowed or nested inner blocks.
foreach ( $block['innerBlocks'] as $inner_block ) {
if (
! in_array( $inner_block['blockName'], $allowed_inner_blocks, true ) ||
! empty( $inner_block['innerBlocks'] )
) {
continue 2;
}
}
}
$output .= render_block( $block );
}
}
return $output;
}
/**
* Parses footnotes markup out of a content string,
* and renders those appropriate for the excerpt.
*
* @since 6.3.0
*
* @param string $content The content to parse.
* @return string The parsed and filtered content.
*/
function excerpt_remove_footnotes( $content ) {
if ( ! str_contains( $content, 'data-fn=' ) ) {
return $content;
}
return preg_replace(
'_\s*\d+\s*_',
'',
$content
);
}
/**
* Renders inner blocks from the allowed wrapper blocks
* for generating an excerpt.
*
* @since 5.8.0
* @access private
*
* @param array $parsed_block The parsed block.
* @param array $allowed_blocks The list of allowed inner blocks.
* @return string The rendered inner blocks.
*/
function _excerpt_render_inner_blocks( $parsed_block, $allowed_blocks ) {
$output = '';
foreach ( $parsed_block['innerBlocks'] as $inner_block ) {
if ( ! in_array( $inner_block['blockName'], $allowed_blocks, true ) ) {
continue;
}
if ( empty( $inner_block['innerBlocks'] ) ) {
$output .= render_block( $inner_block );
} else {
$output .= _excerpt_render_inner_blocks( $inner_block, $allowed_blocks );
}
}
return $output;
}
/**
* Renders a single block into a HTML string.
*
* @since 5.0.0
*
* @global WP_Post $post The post to edit.
*
* @param array $parsed_block {
* An associative array of the block being rendered. See WP_Block_Parser_Block.
*
* @type string $blockName Name of block.
* @type array $attrs Attributes from block comment delimiters.
* @type array[] $innerBlocks List of inner blocks. An array of arrays that
* have the same structure as this one.
* @type string $innerHTML HTML from inside block comment delimiters.
* @type array $innerContent List of string fragments and null markers where
* inner blocks were found.
* }
* @return string String of rendered HTML.
*/
function render_block( $parsed_block ) {
global $post;
$parent_block = null;
/**
* Allows render_block() to be short-circuited, by returning a non-null value.
*
* @since 5.1.0
* @since 5.9.0 The `$parent_block` parameter was added.
*
* @param string|null $pre_render The pre-rendered content. Default null.
* @param array $parsed_block {
* An associative array of the block being rendered. See WP_Block_Parser_Block.
*
* @type string $blockName Name of block.
* @type array $attrs Attributes from block comment delimiters.
* @type array[] $innerBlocks List of inner blocks. An array of arrays that
* have the same structure as this one.
* @type string $innerHTML HTML from inside block comment delimiters.
* @type array $innerContent List of string fragments and null markers where
* inner blocks were found.
* }
* @param WP_Block|null $parent_block If this is a nested block, a reference to the parent block.
*/
$pre_render = apply_filters( 'pre_render_block', null, $parsed_block, $parent_block );
if ( ! is_null( $pre_render ) ) {
return $pre_render;
}
$source_block = $parsed_block;
/**
* Filters the block being rendered in render_block(), before it's processed.
*
* @since 5.1.0
* @since 5.9.0 The `$parent_block` parameter was added.
*
* @param array $parsed_block {
* An associative array of the block being rendered. See WP_Block_Parser_Block.
*
* @type string $blockName Name of block.
* @type array $attrs Attributes from block comment delimiters.
* @type array[] $innerBlocks List of inner blocks. An array of arrays that
* have the same structure as this one.
* @type string $innerHTML HTML from inside block comment delimiters.
* @type array $innerContent List of string fragments and null markers where
* inner blocks were found.
* }
* @param array $source_block {
* An un-modified copy of `$parsed_block`, as it appeared in the source content.
* See WP_Block_Parser_Block.
*
* @type string $blockName Name of block.
* @type array $attrs Attributes from block comment delimiters.
* @type array[] $innerBlocks List of inner blocks. An array of arrays that
* have the same structure as this one.
* @type string $innerHTML HTML from inside block comment delimiters.
* @type array $innerContent List of string fragments and null markers where
* inner blocks were found.
* }
* @param WP_Block|null $parent_block If this is a nested block, a reference to the parent block.
*/
$parsed_block = apply_filters( 'render_block_data', $parsed_block, $source_block, $parent_block );
$context = array();
if ( $post instanceof WP_Post ) {
$context['postId'] = $post->ID;
/*
* The `postType` context is largely unnecessary server-side, since the ID
* is usually sufficient on its own. That being said, since a block's
* manifest is expected to be shared between the server and the client,
* it should be included to consistently fulfill the expectation.
*/
$context['postType'] = $post->post_type;
}
/**
* Filters the default context provided to a rendered block.
*
* @since 5.5.0
* @since 5.9.0 The `$parent_block` parameter was added.
*
* @param array $context Default context.
* @param array $parsed_block {
* An associative array of the block being rendered. See WP_Block_Parser_Block.
*
* @type string $blockName Name of block.
* @type array $attrs Attributes from block comment delimiters.
* @type array[] $innerBlocks List of inner blocks. An array of arrays that
* have the same structure as this one.
* @type string $innerHTML HTML from inside block comment delimiters.
* @type array $innerContent List of string fragments and null markers where
* inner blocks were found.
* }
* @param WP_Block|null $parent_block If this is a nested block, a reference to the parent block.
*/
$context = apply_filters( 'render_block_context', $context, $parsed_block, $parent_block );
$block = new WP_Block( $parsed_block, $context );
return $block->render();
}
/**
* Parses blocks out of a content string.
*
* @since 5.0.0
*
* @param string $content Post content.
* @return array[] {
* Array of block structures.
*
* @type array ...$0 {
* An associative array of a single parsed block object. See WP_Block_Parser_Block.
*
* @type string $blockName Name of block.
* @type array $attrs Attributes from block comment delimiters.
* @type array[] $innerBlocks List of inner blocks. An array of arrays that
* have the same structure as this one.
* @type string $innerHTML HTML from inside block comment delimiters.
* @type array $innerContent List of string fragments and null markers where
* inner blocks were found.
* }
* }
*/
function parse_blocks( $content ) {
/**
* Filter to allow plugins to replace the server-side block parser.
*
* @since 5.0.0
*
* @param string $parser_class Name of block parser class.
*/
$parser_class = apply_filters( 'block_parser_class', 'WP_Block_Parser' );
$parser = new $parser_class();
return $parser->parse( $content );
}
/**
* Parses dynamic blocks out of `post_content` and re-renders them.
*
* @since 5.0.0
*
* @param string $content Post content.
* @return string Updated post content.
*/
function do_blocks( $content ) {
$blocks = parse_blocks( $content );
$top_level_block_count = count( $blocks );
$output = '';
/**
* Parsed blocks consist of a list of top-level blocks. Those top-level
* blocks may themselves contain nested inner blocks. However, every
* top-level block is rendered independently, meaning there are no data
* dependencies between them.
*
* Ideally, therefore, the parser would only need to parse one complete
* top-level block at a time, render it, and move on. Unfortunately, this
* is not possible with {@see \parse_blocks()} because it must parse the
* entire given document at once.
*
* While the current implementation prevents this optimization, it’s still
* possible to reduce the peak memory use when calls to `render_block()`
* on those top-level blocks are memory-heavy (which many of them are).
* By setting each parsed block to `NULL` after rendering it, any memory
* allocated during the render will be freed and reused for the next block.
* Before making this change, that memory was retained and would lead to
* out-of-memory crashes for certain posts that now run with this change.
*/
for ( $i = 0; $i < $top_level_block_count; $i++ ) {
$output .= render_block( $blocks[ $i ] );
$blocks[ $i ] = null;
}
// If there are blocks in this content, we shouldn't run wpautop() on it later.
$priority = has_filter( 'the_content', 'wpautop' );
if ( false !== $priority && doing_filter( 'the_content' ) && has_blocks( $content ) ) {
remove_filter( 'the_content', 'wpautop', $priority );
add_filter( 'the_content', '_restore_wpautop_hook', $priority + 1 );
}
return $output;
}
/**
* If do_blocks() needs to remove wpautop() from the `the_content` filter, this re-adds it afterwards,
* for subsequent `the_content` usage.
*
* @since 5.0.0
* @access private
*
* @param string $content The post content running through this filter.
* @return string The unmodified content.
*/
function _restore_wpautop_hook( $content ) {
$current_priority = has_filter( 'the_content', '_restore_wpautop_hook' );
add_filter( 'the_content', 'wpautop', $current_priority - 1 );
remove_filter( 'the_content', '_restore_wpautop_hook', $current_priority );
return $content;
}
/**
* Returns the current version of the block format that the content string is using.
*
* If the string doesn't contain blocks, it returns 0.
*
* @since 5.0.0
*
* @param string $content Content to test.
* @return int The block format version is 1 if the content contains one or more blocks, 0 otherwise.
*/
function block_version( $content ) {
return has_blocks( $content ) ? 1 : 0;
}
/**
* Registers a new block style.
*
* @since 5.3.0
* @since 6.6.0 Added support for registering styles for multiple block types.
*
* @link https://developer.wordpress.org/block-editor/reference-guides/block-api/block-styles/
*
* @param string|string[] $block_name Block type name including namespace or array of namespaced block type names.
* @param array $style_properties Array containing the properties of the style name, label,
* style_handle (name of the stylesheet to be enqueued),
* inline_style (string containing the CSS to be added),
* style_data (theme.json-like array to generate CSS from).
* See WP_Block_Styles_Registry::register().
* @return bool True if the block style was registered with success and false otherwise.
*/
function register_block_style( $block_name, $style_properties ) {
return WP_Block_Styles_Registry::get_instance()->register( $block_name, $style_properties );
}
/**
* Unregisters a block style.
*
* @since 5.3.0
*
* @param string $block_name Block type name including namespace.
* @param string $block_style_name Block style name.
* @return bool True if the block style was unregistered with success and false otherwise.
*/
function unregister_block_style( $block_name, $block_style_name ) {
return WP_Block_Styles_Registry::get_instance()->unregister( $block_name, $block_style_name );
}
/**
* Checks whether the current block type supports the feature requested.
*
* @since 5.8.0
* @since 6.4.0 The `$feature` parameter now supports a string.
*
* @param WP_Block_Type $block_type Block type to check for support.
* @param string|array $feature Feature slug, or path to a specific feature to check support for.
* @param mixed $default_value Optional. Fallback value for feature support. Default false.
* @return bool Whether the feature is supported.
*/
function block_has_support( $block_type, $feature, $default_value = false ) {
$block_support = $default_value;
if ( $block_type instanceof WP_Block_Type ) {
if ( is_array( $feature ) && count( $feature ) === 1 ) {
$feature = $feature[0];
}
if ( is_array( $feature ) ) {
$block_support = _wp_array_get( $block_type->supports, $feature, $default_value );
} elseif ( isset( $block_type->supports[ $feature ] ) ) {
$block_support = $block_type->supports[ $feature ];
}
}
return true === $block_support || is_array( $block_support );
}
/**
* Converts typography keys declared under `supports.*` to `supports.typography.*`.
*
* Displays a `_doing_it_wrong()` notice when a block using the older format is detected.
*
* @since 5.8.0
*
* @param array $metadata Metadata for registering a block type.
* @return array Filtered metadata for registering a block type.
*/
function wp_migrate_old_typography_shape( $metadata ) {
if ( ! isset( $metadata['supports'] ) ) {
return $metadata;
}
$typography_keys = array(
'__experimentalFontFamily',
'__experimentalFontStyle',
'__experimentalFontWeight',
'__experimentalLetterSpacing',
'__experimentalTextDecoration',
'__experimentalTextTransform',
'fontSize',
'lineHeight',
);
foreach ( $typography_keys as $typography_key ) {
$support_for_key = isset( $metadata['supports'][ $typography_key ] ) ? $metadata['supports'][ $typography_key ] : null;
if ( null !== $support_for_key ) {
_doing_it_wrong(
'register_block_type_from_metadata()',
sprintf(
/* translators: 1: Block type, 2: Typography supports key, e.g: fontSize, lineHeight, etc. 3: block.json, 4: Old metadata key, 5: New metadata key. */
__( 'Block "%1$s" is declaring %2$s support in %3$s file under %4$s. %2$s support is now declared under %5$s.' ),
$metadata['name'],
"$typography_key ",
'block.json ',
"supports.$typography_key ",
"supports.typography.$typography_key "
),
'5.8.0'
);
_wp_array_set( $metadata['supports'], array( 'typography', $typography_key ), $support_for_key );
unset( $metadata['supports'][ $typography_key ] );
}
}
return $metadata;
}
/**
* Helper function that constructs a WP_Query args array from
* a `Query` block properties.
*
* It's used in Query Loop, Query Pagination Numbers and Query Pagination Next blocks.
*
* @since 5.8.0
* @since 6.1.0 Added `query_loop_block_query_vars` filter and `parents` support in query.
* @since 6.7.0 Added support for the `format` property in query.
*
* @param WP_Block $block Block instance.
* @param int $page Current query's page.
*
* @return array Returns the constructed WP_Query arguments.
*/
function build_query_vars_from_query_block( $block, $page ) {
$query = array(
'post_type' => 'post',
'order' => 'DESC',
'orderby' => 'date',
'post__not_in' => array(),
'tax_query' => array(),
);
if ( isset( $block->context['query'] ) ) {
if ( ! empty( $block->context['query']['postType'] ) ) {
$post_type_param = $block->context['query']['postType'];
if ( is_post_type_viewable( $post_type_param ) ) {
$query['post_type'] = $post_type_param;
}
}
if ( isset( $block->context['query']['sticky'] ) && ! empty( $block->context['query']['sticky'] ) ) {
$sticky = get_option( 'sticky_posts' );
if ( 'only' === $block->context['query']['sticky'] ) {
/*
* Passing an empty array to post__in will return have_posts() as true (and all posts will be returned).
* Logic should be used before hand to determine if WP_Query should be used in the event that the array
* being passed to post__in is empty.
*
* @see https://core.trac.wordpress.org/ticket/28099
*/
$query['post__in'] = ! empty( $sticky ) ? $sticky : array( 0 );
$query['ignore_sticky_posts'] = 1;
} elseif ( 'exclude' === $block->context['query']['sticky'] ) {
$query['post__not_in'] = array_merge( $query['post__not_in'], $sticky );
} elseif ( 'ignore' === $block->context['query']['sticky'] ) {
$query['ignore_sticky_posts'] = 1;
}
}
if ( ! empty( $block->context['query']['exclude'] ) ) {
$excluded_post_ids = array_map( 'intval', $block->context['query']['exclude'] );
$excluded_post_ids = array_filter( $excluded_post_ids );
$query['post__not_in'] = array_merge( $query['post__not_in'], $excluded_post_ids );
}
if (
isset( $block->context['query']['perPage'] ) &&
is_numeric( $block->context['query']['perPage'] )
) {
$per_page = absint( $block->context['query']['perPage'] );
$offset = 0;
if (
isset( $block->context['query']['offset'] ) &&
is_numeric( $block->context['query']['offset'] )
) {
$offset = absint( $block->context['query']['offset'] );
}
$query['offset'] = ( $per_page * ( $page - 1 ) ) + $offset;
$query['posts_per_page'] = $per_page;
}
// Migrate `categoryIds` and `tagIds` to `tax_query` for backwards compatibility.
if ( ! empty( $block->context['query']['categoryIds'] ) || ! empty( $block->context['query']['tagIds'] ) ) {
$tax_query_back_compat = array();
if ( ! empty( $block->context['query']['categoryIds'] ) ) {
$tax_query_back_compat[] = array(
'taxonomy' => 'category',
'terms' => array_filter( array_map( 'intval', $block->context['query']['categoryIds'] ) ),
'include_children' => false,
);
}
if ( ! empty( $block->context['query']['tagIds'] ) ) {
$tax_query_back_compat[] = array(
'taxonomy' => 'post_tag',
'terms' => array_filter( array_map( 'intval', $block->context['query']['tagIds'] ) ),
'include_children' => false,
);
}
$query['tax_query'] = array_merge( $query['tax_query'], $tax_query_back_compat );
}
if ( ! empty( $block->context['query']['taxQuery'] ) ) {
$tax_query = array();
foreach ( $block->context['query']['taxQuery'] as $taxonomy => $terms ) {
if ( is_taxonomy_viewable( $taxonomy ) && ! empty( $terms ) ) {
$tax_query[] = array(
'taxonomy' => $taxonomy,
'terms' => array_filter( array_map( 'intval', $terms ) ),
'include_children' => false,
);
}
}
$query['tax_query'] = array_merge( $query['tax_query'], $tax_query );
}
if ( ! empty( $block->context['query']['format'] ) && is_array( $block->context['query']['format'] ) ) {
$formats = $block->context['query']['format'];
/*
* Validate that the format is either `standard` or a supported post format.
* - First, add `standard` to the array of valid formats.
* - Then, remove any invalid formats.
*/
$valid_formats = array_merge( array( 'standard' ), get_post_format_slugs() );
$formats = array_intersect( $formats, $valid_formats );
/*
* The relation needs to be set to `OR` since the request can contain
* two separate conditions. The user may be querying for items that have
* either the `standard` format or a specific format.
*/
$formats_query = array( 'relation' => 'OR' );
/*
* The default post format, `standard`, is not stored in the database.
* If `standard` is part of the request, the query needs to exclude all post items that
* have a format assigned.
*/
if ( in_array( 'standard', $formats, true ) ) {
$formats_query[] = array(
'taxonomy' => 'post_format',
'field' => 'slug',
'operator' => 'NOT EXISTS',
);
// Remove the `standard` format, since it cannot be queried.
unset( $formats[ array_search( 'standard', $formats, true ) ] );
}
// Add any remaining formats to the formats query.
if ( ! empty( $formats ) ) {
// Add the `post-format-` prefix.
$terms = array_map(
static function ( $format ) {
return "post-format-$format";
},
$formats
);
$formats_query[] = array(
'taxonomy' => 'post_format',
'field' => 'slug',
'terms' => $terms,
'operator' => 'IN',
);
}
/*
* Add `$formats_query` to `$query`, as long as it contains more than one key:
* If `$formats_query` only contains the initial `relation` key, there are no valid formats to query,
* and the query should not be modified.
*/
if ( count( $formats_query ) > 1 ) {
// Enable filtering by both post formats and other taxonomies by combining them with `AND`.
if ( empty( $query['tax_query'] ) ) {
$query['tax_query'] = $formats_query;
} else {
$query['tax_query'] = array(
'relation' => 'AND',
$query['tax_query'],
$formats_query,
);
}
}
}
if (
isset( $block->context['query']['order'] ) &&
in_array( strtoupper( $block->context['query']['order'] ), array( 'ASC', 'DESC' ), true )
) {
$query['order'] = strtoupper( $block->context['query']['order'] );
}
if ( isset( $block->context['query']['orderBy'] ) ) {
$query['orderby'] = $block->context['query']['orderBy'];
}
if (
isset( $block->context['query']['author'] )
) {
if ( is_array( $block->context['query']['author'] ) ) {
$query['author__in'] = array_filter( array_map( 'intval', $block->context['query']['author'] ) );
} elseif ( is_string( $block->context['query']['author'] ) ) {
$query['author__in'] = array_filter( array_map( 'intval', explode( ',', $block->context['query']['author'] ) ) );
} elseif ( is_int( $block->context['query']['author'] ) && $block->context['query']['author'] > 0 ) {
$query['author'] = $block->context['query']['author'];
}
}
if ( ! empty( $block->context['query']['search'] ) ) {
$query['s'] = $block->context['query']['search'];
}
if ( ! empty( $block->context['query']['parents'] ) && is_post_type_hierarchical( $query['post_type'] ) ) {
$query['post_parent__in'] = array_unique( array_map( 'intval', $block->context['query']['parents'] ) );
}
}
/**
* Filters the arguments which will be passed to `WP_Query` for the Query Loop Block.
*
* Anything to this filter should be compatible with the `WP_Query` API to form
* the query context which will be passed down to the Query Loop Block's children.
* This can help, for example, to include additional settings or meta queries not
* directly supported by the core Query Loop Block, and extend its capabilities.
*
* Please note that this will only influence the query that will be rendered on the
* front-end. The editor preview is not affected by this filter. Also, worth noting
* that the editor preview uses the REST API, so, ideally, one should aim to provide
* attributes which are also compatible with the REST API, in order to be able to
* implement identical queries on both sides.
*
* @since 6.1.0
*
* @param array $query Array containing parameters for `WP_Query` as parsed by the block context.
* @param WP_Block $block Block instance.
* @param int $page Current query's page.
*/
return apply_filters( 'query_loop_block_query_vars', $query, $block, $page );
}
/**
* Helper function that returns the proper pagination arrow HTML for
* `QueryPaginationNext` and `QueryPaginationPrevious` blocks based
* on the provided `paginationArrow` from `QueryPagination` context.
*
* It's used in QueryPaginationNext and QueryPaginationPrevious blocks.
*
* @since 5.9.0
*
* @param WP_Block $block Block instance.
* @param bool $is_next Flag for handling `next/previous` blocks.
* @return string|null The pagination arrow HTML or null if there is none.
*/
function get_query_pagination_arrow( $block, $is_next ) {
$arrow_map = array(
'none' => '',
'arrow' => array(
'next' => '→',
'previous' => '←',
),
'chevron' => array(
'next' => '»',
'previous' => '«',
),
);
if ( ! empty( $block->context['paginationArrow'] ) && array_key_exists( $block->context['paginationArrow'], $arrow_map ) && ! empty( $arrow_map[ $block->context['paginationArrow'] ] ) ) {
$pagination_type = $is_next ? 'next' : 'previous';
$arrow_attribute = $block->context['paginationArrow'];
$arrow = $arrow_map[ $block->context['paginationArrow'] ][ $pagination_type ];
$arrow_classes = "wp-block-query-pagination-$pagination_type-arrow is-arrow-$arrow_attribute";
return " ";
}
return null;
}
/**
* Helper function that constructs a comment query vars array from the passed
* block properties.
*
* It's used with the Comment Query Loop inner blocks.
*
* @since 6.0.0
*
* @param WP_Block $block Block instance.
* @return array Returns the comment query parameters to use with the
* WP_Comment_Query constructor.
*/
function build_comment_query_vars_from_block( $block ) {
$comment_args = array(
'orderby' => 'comment_date_gmt',
'order' => 'ASC',
'status' => 'approve',
'no_found_rows' => false,
);
if ( is_user_logged_in() ) {
$comment_args['include_unapproved'] = array( get_current_user_id() );
} else {
$unapproved_email = wp_get_unapproved_comment_author_email();
if ( $unapproved_email ) {
$comment_args['include_unapproved'] = array( $unapproved_email );
}
}
if ( ! empty( $block->context['postId'] ) ) {
$comment_args['post_id'] = (int) $block->context['postId'];
}
if ( get_option( 'thread_comments' ) ) {
$comment_args['hierarchical'] = 'threaded';
} else {
$comment_args['hierarchical'] = false;
}
if ( get_option( 'page_comments' ) === '1' || get_option( 'page_comments' ) === true ) {
$per_page = get_option( 'comments_per_page' );
$default_page = get_option( 'default_comments_page' );
if ( $per_page > 0 ) {
$comment_args['number'] = $per_page;
$page = (int) get_query_var( 'cpage' );
if ( $page ) {
$comment_args['paged'] = $page;
} elseif ( 'oldest' === $default_page ) {
$comment_args['paged'] = 1;
} elseif ( 'newest' === $default_page ) {
$max_num_pages = (int) ( new WP_Comment_Query( $comment_args ) )->max_num_pages;
if ( 0 !== $max_num_pages ) {
$comment_args['paged'] = $max_num_pages;
}
}
}
}
return $comment_args;
}
/**
* Helper function that returns the proper pagination arrow HTML for
* `CommentsPaginationNext` and `CommentsPaginationPrevious` blocks based on the
* provided `paginationArrow` from `CommentsPagination` context.
*
* It's used in CommentsPaginationNext and CommentsPaginationPrevious blocks.
*
* @since 6.0.0
*
* @param WP_Block $block Block instance.
* @param string $pagination_type Optional. Type of the arrow we will be rendering.
* Accepts 'next' or 'previous'. Default 'next'.
* @return string|null The pagination arrow HTML or null if there is none.
*/
function get_comments_pagination_arrow( $block, $pagination_type = 'next' ) {
$arrow_map = array(
'none' => '',
'arrow' => array(
'next' => '→',
'previous' => '←',
),
'chevron' => array(
'next' => '»',
'previous' => '«',
),
);
if ( ! empty( $block->context['comments/paginationArrow'] ) && ! empty( $arrow_map[ $block->context['comments/paginationArrow'] ][ $pagination_type ] ) ) {
$arrow_attribute = $block->context['comments/paginationArrow'];
$arrow = $arrow_map[ $block->context['comments/paginationArrow'] ][ $pagination_type ];
$arrow_classes = "wp-block-comments-pagination-$pagination_type-arrow is-arrow-$arrow_attribute";
return " ";
}
return null;
}
/**
* Strips all HTML from the content of footnotes, and sanitizes the ID.
*
* This function expects slashed data on the footnotes content.
*
* @access private
* @since 6.3.2
*
* @param string $footnotes JSON-encoded string of an array containing the content and ID of each footnote.
* @return string Filtered content without any HTML on the footnote content and with the sanitized ID.
*/
function _wp_filter_post_meta_footnotes( $footnotes ) {
$footnotes_decoded = json_decode( $footnotes, true );
if ( ! is_array( $footnotes_decoded ) ) {
return '';
}
$footnotes_sanitized = array();
foreach ( $footnotes_decoded as $footnote ) {
if ( ! empty( $footnote['content'] ) && ! empty( $footnote['id'] ) ) {
$footnotes_sanitized[] = array(
'id' => sanitize_key( $footnote['id'] ),
'content' => wp_unslash( wp_filter_post_kses( wp_slash( $footnote['content'] ) ) ),
);
}
}
return wp_json_encode( $footnotes_sanitized );
}
/**
* Adds the filters for footnotes meta field.
*
* @access private
* @since 6.3.2
*/
function _wp_footnotes_kses_init_filters() {
add_filter( 'sanitize_post_meta_footnotes', '_wp_filter_post_meta_footnotes' );
}
/**
* Removes the filters for footnotes meta field.
*
* @access private
* @since 6.3.2
*/
function _wp_footnotes_remove_filters() {
remove_filter( 'sanitize_post_meta_footnotes', '_wp_filter_post_meta_footnotes' );
}
/**
* Registers the filter of footnotes meta field if the user does not have `unfiltered_html` capability.
*
* @access private
* @since 6.3.2
*/
function _wp_footnotes_kses_init() {
_wp_footnotes_remove_filters();
if ( ! current_user_can( 'unfiltered_html' ) ) {
_wp_footnotes_kses_init_filters();
}
}
/**
* Initializes the filters for footnotes meta field when imported data should be filtered.
*
* This filter is the last one being executed on {@see 'force_filtered_html_on_import'}.
* If the input of the filter is true, it means we are in an import situation and should
* enable kses, independently of the user capabilities. So in that case we call
* _wp_footnotes_kses_init_filters().
*
* @access private
* @since 6.3.2
*
* @param string $arg Input argument of the filter.
* @return string Input argument of the filter.
*/
function _wp_footnotes_force_filtered_html_on_import_filter( $arg ) {
// If `force_filtered_html_on_import` is true, we need to init the global styles kses filters.
if ( $arg ) {
_wp_footnotes_kses_init_filters();
}
return $arg;
}
|